VERSION 5.00
Begin VB.Form Form1 
   Caption         =   "Text Extraction"
   ClientHeight    =   2580
   ClientLeft      =   60
   ClientTop       =   348
   ClientWidth     =   4680
   LinkTopic       =   "Form1"
   ScaleHeight     =   2580
   ScaleWidth      =   4680
   StartUpPosition =   3  'Windows-Standard
   Begin VB.CommandButton Command1 
      Caption         =   "Create text file"
      Height          =   855
      Left            =   840
      TabIndex        =   0
      Top             =   720
      Width           =   2895
   End
End
Attribute VB_Name = "Form1"
Attribute VB_GlobalNameSpace = False
Attribute VB_Creatable = False
Attribute VB_PredeclaredId = True
Attribute VB_Exposed = False
Option Explicit
Private Declare Function ShellExecuteA Lib "shell32.dll" (ByVal hWnd As Long, ByVal lpOperation As String, ByVal lpFile As String, ByVal lpParameters As String, ByVal lpDirectory As String, ByVal nShowCmd As Long) As Long
Private WithEvents pdf As CPDF 'Activate event support
Attribute pdf.VB_VarHelpID = -1

Private Sub WritePageIdentifier(ByVal file As Long, ByVal PageNum As Long)
   Dim s As String
   If PageNum > 1 Then
      s = StrConv(Chr(13) + Chr(10), vbUnicode)
      Put file, , s
   End If
   s = StrConv("%----------------------- Page " + Str(PageNum) + " -----------------------------" + Chr(13) + Chr(10), vbUnicode)
   Put file, , s
End Sub

Private Sub pdf_Error(ByVal Description As String, ByVal ErrType As Long, DoBreak As Boolean)
   MsgBox Description, vbExclamation, "Error"
   DoBreak = True ' We break processing if an error occurred!
End Sub

Private Sub Command1_Click()
   On Error GoTo Err
   ' Error messages and warnings are passed to the error event function.
   If Not pdf.CreateNewPDFA("") Then ' The output file is opened later
      Exit Sub
   End If

   ' External CMaps should always be loaded when processing text from PDF files.
   ' See the description of GetPageText() for further information.
   pdf.SetCMapDir "../../../Resource/CMap/", lcmDelayed Or lcmRecursive

   ' We avoid the conversion of pages to templates
   pdf.SetImportFlags ifImportAll Or ifImportAsPage
   If pdf.OpenImportFile("../../../dynapdf_help.pdf", ptOpen, "") < 0 Then
      pdf.FreePDF
      Exit Sub
   End If
   pdf.ImportPDFFile 1, 1#, 1#
   pdf.CloseImportFile

   ' We flatten markup annotations and form fields so that we can extract the text in these objects too.
   pdf.FlattenAnnots affMarkupAnnots
   pdf.FlattenForm

   ' We write the output file into the current directory.
   Dim Path As String
   Path = App.Path + "\out.txt"

   Dim file As Long
   file = FreeFile

   Open Path For Binary Access Write As file
   ' Byte Order Mark (BOM)
   Dim s As String
   s = StrConv(ChrW$(&HFEFF), vbUnicode)
   Put file, , s

   Dim i As Long
   Dim outText As String
   For i = 1 To pdf.GetPageCount
      WritePageIdentifier file, i
      ' It is not recommended to sort text on the y-axis since causes sometimes strange results.
      If pdf.ExtractText(i, TTextExtractionFlags.tefDeleteOverlappingText Or TTextExtractionFlags.tefSortTextX, outText) Then
         ' It is important to store the conversion result in a string variable. Otherwise VB adds some garbage to the file...
         s = StrConv(outText, vbUnicode)
         Put file, , s
      End If
   Next i

   Close file
   Set pdf = Nothing

   ShellExecuteA Me.hWnd, "open", Path, vbNullString, vbNullString, 1
   Exit Sub
Err:
   Set pdf = Nothing
   MsgBox Err.Source & Chr(13) & Err.Description, vbCritical, "Fatal error"
End Sub

Private Sub Form_Load()
   On Error GoTo Err
   Set pdf = New CPDF
   Exit Sub
Err:
   MsgBox "Out of memory!", vbCritical, "Fatal error"
End Sub

Private Sub Form_Terminate()
   Set pdf = Nothing
End Sub
